rm(list=ls(all=T))
knitr::opts_chunk$set(paged.print=FALSE, comment = NA)
pacman::p_load(magrittr, readr, caTools, ggplot2, dplyr, vcd,plotly,tidyr,latex2exp,Matrix)Z = read_csv("data/ta_feng_all_months_merged.csv") %>%
data.frame %>% setNames(c(
"date","cust","age","area","cat","prod","qty","cost","price"))Rows: 817741 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): TRANSACTION_DT, CUSTOMER_ID, AGE_GROUP, PIN_CODE, PRODUCT_ID
dbl (4): PRODUCT_SUBCLASS, AMOUNT, ASSET, SALES_PRICE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1] 817741
# A tibble: 817,741 × 9
date cust age area cat prod qty cost price
<chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 11/1/2000 01104905 45-49 115 110411 4710199010372 2 24 30
2 11/1/2000 00418683 45-49 115 120107 4710857472535 1 48 46
3 11/1/2000 01057331 35-39 115 100407 4710043654103 2 142 166
4 11/1/2000 01849332 45-49 Others 120108 4710126092129 1 32 38
5 11/1/2000 01981995 50-54 115 100205 4710176021445 1 14 18
6 11/1/2000 01741797 35-39 115 110122 0078895770025 1 54 75
7 11/1/2000 00308359 60-64 115 110507 4710192225520 1 85 105
8 11/1/2000 01607000 35-39 221 520503 4712936888817 1 45 68
9 11/1/2000 01057331 35-39 115 320203 4715398106864 2 70 78
10 11/1/2000 00236645 35-39 Unknown 120110 4710126091870 1 43 53
# ℹ 817,731 more rows
瞭解顧客的年齡客群與分布地區
age.group = c("<25","25-29","30-34","35-39","40-44",
"45-49","50-54","55-59","60-64",">65")
Z$age = c(paste0("a",seq(24,69,5)),"a99")[match(Z$age,age.group,11)]
Z$area = paste0("z",Z$area)par(mfrow=c(1,2),cex=0.7)
table(Z$age, useNA='ifany') %>% barplot(main="Age Groups", las=2)
table(Z$area,useNA='ifany') %>% barplot(main="Areas", las=2)經由上圖可以發現顧客年齡多為29-44為主,而區域則是z115(南港)與z221(汐止)為主。
qty cost price
99% 6 858.0 1014.00
99.9% 14 2722.0 3135.82
99.95% 24 3799.3 3999.00
[1] 817182
把每一天、每一個顧客的交易項目彙總為一張訂單
Warning: The `...` argument of `group_indices()` is deprecated as of dplyr 1.0.0.
ℹ Please `group_by()` first
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
X = Z %>% group_by(tid) %>% summarise(
date = min(date), # 交易日期
cust = min(cust), # 顧客 ID
age = min(age), # 顧客 年齡級別
area = min(area), # 顧客 居住區別
items = n(), # 交易項目(總)數
pieces = sum(qty), # 產品(總)件數
total = sum(price), # 交易(總)金額
gross = sum(price - cost) # 毛利
) %>% data.frame
nrow(X)[1] 119422
items pieces total gross
99.9% 54 81.0000 9009.579 1824.737
99.95% 62 94.2895 10611.579 2179.817
99.99% 82 133.0000 16044.401 3226.548
tid date cust age
Min. : 1 Min. :2000-11-01 Length:119328 Length:119328
1st Qu.: 29855 1st Qu.:2000-11-29 Class :character Class :character
Median : 59705 Median :2001-01-01 Mode :character Mode :character
Mean : 59712 Mean :2000-12-31
3rd Qu.: 89581 3rd Qu.:2001-02-02
Max. :119422 Max. :2001-02-28
area items pieces total
Length:119328 Min. : 1.000 Min. : 1.000 Min. : 5.0
Class :character 1st Qu.: 2.000 1st Qu.: 3.000 1st Qu.: 227.0
Mode :character Median : 5.000 Median : 6.000 Median : 510.0
Mean : 6.802 Mean : 9.222 Mean : 851.6
3rd Qu.: 9.000 3rd Qu.:12.000 3rd Qu.: 1080.0
Max. :62.000 Max. :94.000 Max. :15345.0
gross
Min. :-1645.0
1st Qu.: 21.0
Median : 68.0
Mean : 130.9
3rd Qu.: 168.0
Max. : 3389.0
d0 = max(X$date) + 1
A = X %>% mutate(
days = as.integer(difftime(d0, date, units="days"))
) %>% group_by(cust) %>% summarise(
TID = min(tid),
r = min(days), # recency
s = max(days), # seniority
f = n(), # frquency
m = mean(total), # monetary
rev = sum(total), # total revenue contribution
raw = sum(gross), # total gross profit contribution
age = min(age), # age group
area = min(area), # area code
) %>% data.frame
nrow(A) # 32241[1] 32241
par(mfrow=c(1,2),cex=0.7)
table(A$age, useNA='ifany') %>% barplot(main="Age Groups",las=2)
table(A$area, useNA='ifany') %>% barplot(main="Areas",las=2) cust TID r s
Length:32241 Min. : 1 Min. : 1.00 Min. : 1.00
Class :character 1st Qu.: 11363 1st Qu.: 9.00 1st Qu.: 56.00
Mode :character Median : 29799 Median : 26.00 Median : 92.00
Mean : 39261 Mean : 37.45 Mean : 80.78
3rd Qu.: 61602 3rd Qu.: 60.00 3rd Qu.:110.00
Max. :119422 Max. :120.00 Max. :120.00
f m rev raw
Min. : 1.000 Min. : 8.0 Min. : 8 Min. : -784.0
1st Qu.: 1.000 1st Qu.: 365.0 1st Qu.: 707 1st Qu.: 75.0
Median : 2.000 Median : 705.7 Median : 1750 Median : 241.0
Mean : 3.701 Mean : 993.1 Mean : 3152 Mean : 484.6
3rd Qu.: 4.000 3rd Qu.: 1291.0 3rd Qu.: 3968 3rd Qu.: 612.0
Max. :85.000 Max. :12636.0 Max. :127686 Max. :20273.0
age area
Length:32241 Length:32241
Class :character Class :character
Mode :character Mode :character
par(mfrow=c(3,2), mar=c(3,3,4,2))
for(x in c('r','s','f','m'))
hist(A[,x],freq=T,main=x,xlab="",ylab="",cex.main=2)
hist(pmin(A$f,10),0:10,freq=T,xlab="",ylab="",cex.main=2)
hist(log(A$m,10),freq=T,xlab="",ylab="",cex.main=2)將剛剛處理完的資料全部重新儲存。
date cust age area cat prod qty cost price tid
0 0 0 0 0 0 0 0 0 0
tid date cust age area items pieces total gross
0 0 0 0 0 0 0 0 0
cust TID r s f m rev raw age area
0 0 0 0 0 0 0 0 0 0
由於a99(沒有年齡資料的顧客)人數不多,而且特徵很獨特,探索時我們可以考慮濾掉這群顧客
A0 %>% filter(age!="a99") %>% # 濾掉沒有年齡資料的顧客('a99')
group_by(age) %>% summarise(
TID = min(TID),
Group.Size = n(), # 族群人數
avg.Freq = mean(f), # 平均購買次數
avg.Revenue = sum(f*m)/sum(f) # 平均客單價
) %>%
ggplot(aes(y=avg.Freq, x=avg.Revenue)) +
geom_point(aes(col=age, size=Group.Size), alpha=0.5) +
geom_text(aes(label=age)) +
scale_size(range=c(5,25)) +
theme_bw() + theme(legend.position="none") +
ggtitle("年齡區隔特徵 (泡泡大小:族群人數)") +
ylab("平均購買次數") + xlab("平均客單價")
經上面的泡泡圖顯示,平均客單價較高的客群為a34與a39。
A0 %>% filter(age!="a99") %>% # 濾掉沒有年齡資料的顧客('a99')
group_by(area) %>% summarise(
TID = min(TID),
Group.Size = n(), # 族群人數
avg.Freq = mean(f), # 平均購買次數
avg.Revenue = sum(f*m)/sum(f) # 平均客單價
) %>%
ggplot(aes(y=avg.Freq, x=avg.Revenue)) +
geom_point(aes(col=area, size=Group.Size), alpha=0.5) +
geom_text(aes(label=area)) +
scale_size(range=c(5,25)) +
theme_bw() + theme(legend.position="none") +
ggtitle("地理區隔特徵 (泡泡大小:族群人數)") +
ylab("平均購買次數") + xlab("平均客單價")故綜上面顯示的兩張泡泡圖分析,我們將年齡與地區作為我們的TA篩選條件。 首先,我們先選出了a34與a39的合併組合,因為從第一張泡泡圖可看出此組合的平均客單價為最高,故此組合為一個可利用行銷而提高收益的對象。
再來,我們依地區分布再選出了z115與z221,而理由是z115在上述的年齡組合的消費人數較少,我們希望利用行銷手法來拓展潛在客群,並且提升在Z115的消費人數;z221地區則在上述年齡的組合的消費人數較高,故我們希望將此常客族群繼續留住,再提高他們對TaFeng的忠誠度。
這裡將兩群TA篩選出來,而可看到兩群人數為總顧客的十分之一,有達到在行銷人數條件的標準
X = group_by(Z, tid) %>% summarise(
date = first(date), # date of transaction
cust = first(cust), # customer id
age = first(age), # age group
area = first(area), # area group
items = n(), # number of items
pieces = sum(qty), # number of pieces
total = sum(price), # total amount
gross = sum(price - cost) # raw profit
) %>% data.frame # 88387 交易筆數 tid date cust age
Min. : 1 Min. :2000-11-01 Length:88387 Length:88387
1st Qu.:22098 1st Qu.:2000-11-23 Class :character Class :character
Median :44194 Median :2000-12-12 Mode :character Mode :character
Mean :44194 Mean :2000-12-15
3rd Qu.:66291 3rd Qu.:2001-01-12
Max. :88387 Max. :2001-01-31
area items pieces total
Length:88387 Min. : 1.000 Min. : 1.000 Min. : 5.0
Class :character 1st Qu.: 2.000 1st Qu.: 3.000 1st Qu.: 230.0
Mode :character Median : 5.000 Median : 6.000 Median : 522.0
Mean : 6.994 Mean : 9.453 Mean : 888.7
3rd Qu.: 9.000 3rd Qu.: 12.000 3rd Qu.: 1120.0
Max. :112.000 Max. :339.000 Max. :30171.0
gross
Min. :-1645.0
1st Qu.: 23.0
Median : 72.0
Mean : 138.3
3rd Qu.: 174.0
Max. : 8069.0
items pieces total gross
99.9% 56.0000 84.0000 9378.684 1883.228
99.95% 64.0000 98.0000 11261.751 2317.087
99.99% 85.6456 137.6456 17699.325 3389.646
d0 = max(X$date) + 1
A = X %>% mutate(
days = as.integer(difftime(d0, date, units="days"))
) %>%
group_by(cust) %>% summarise(
TID = min(tid),
r = min(days), # recency
s = max(days), # seniority
f = n(), # frequency
m = mean(total), # monetary(平均客單價)
rev = sum(total), # total revenue contribution(總營收貢獻)
raw = sum(gross), # total gross profit contribution(總淨利貢獻)
age = age[1], # age group
area = area[1], # area code
) %>% data.frame # 28584 顧客
nrow(A)[1] 28584
FALSE TRUE
FALSE 15342 0
TRUE 0 13242
cust TID r s
Length:28584 Min. : 1 Min. : 1.00 Min. : 1.00
Class :character 1st Qu.: 9852 1st Qu.:11.00 1st Qu.:47.00
Mode :character Median :25162 Median :21.00 Median :68.00
Mean :31046 Mean :32.12 Mean :61.27
3rd Qu.:48834 3rd Qu.:53.00 3rd Qu.:83.00
Max. :88387 Max. :92.00 Max. :92.00
f m rev raw
Min. : 1.000 Min. : 8.0 Min. : 8 Min. : -742.0
1st Qu.: 1.000 1st Qu.: 359.4 1st Qu.: 638 1st Qu.: 70.0
Median : 2.000 Median : 709.5 Median : 1566 Median : 218.0
Mean : 3.089 Mean : 1012.4 Mean : 2711 Mean : 420.8
3rd Qu.: 4.000 3rd Qu.: 1315.0 3rd Qu.: 3426 3rd Qu.: 535.0
Max. :60.000 Max. :10634.0 Max. :99597 Max. :15565.0
age area amount buy
Length:28584 Length:28584 Min. : 8 Mode :logical
Class :character Class :character 1st Qu.: 454 FALSE:15342
Mode :character Mode :character Median : 993 TRUE :13242
Mean : 1499
3rd Qu.: 1955
Max. :28089
NA's :15342
#z115_ca在2月的購買人數
z115_cu_A1 = matrix(NA, nrow(z115_cu),ncol(z115_cu))
z115_cu_A1 = A[A$TID %in% z115_cu$TID,]
table(z115_cu_A1$buy)
FALSE TRUE
1462 1808
依上面結果,z221(汐止)的客群在2月的購買人數為1623。
#z221_cu在2月的購買人數
z221_cu_A1 = matrix(NA, nrow(z221_cu),ncol(z221_cu))
z221_cu_A1 = A[A$TID %in% z221_cu$TID,]
table(z221_cu_A1$buy)
FALSE TRUE
1715 1623
依上面結果,z221(南港)的客群在2月的購買人數為1808。
Data for 購買機率模型與購買金額模型
- A之中每一筆資料都可以拿來做購買機率模型
- 但只有A$amount有值的資料可以拿來做購買金額模型 : A2
- A和A2都需要在相同的目標變數分佈切割成訓練測試(TR)與測試資料(TS)
-
我們分別用caTools::sample.split()和隨機抽樣(sample)來製作分割向量:spl,spl2
z115的購買機率模型
#z115的購買機率模型
z115_X1 = subset(X, cust %in% z115_cu_A1$cust & date < as.Date("2001-02-01"))
z115_Z1 = subset(Z, cust %in% z115_cu_A1$cust & date < as.Date("2001-02-01"))
set.seed(2018)
z115_spl = sample.split(z115_cu_A1$buy, SplitRatio = 0.7)
c(nrow(z115_cu_A1),sum(z115_spl),sum(!z115_spl))[1] 3270 2289 981
FALSE TRUE
0.5524975 0.5530799
cbind(z115_cu_A1, z115_spl) %>% filter(buy) %>%
ggplot(aes(x=log(amount))) + geom_density(aes(fill=z115_spl), alpha=0.5)z115的購買機率金額
z115_A2 = subset(z115_cu_A1, buy) %>% mutate_at(c("m","rev","amount"), log10)
n = nrow(z115_A2)
#隨機抽樣製作分割向量spl2
set.seed(2018); z115_spl2 = 1:n %in% sample(1:n, round(0.7*n))
c(nrow(z115_A2), sum(z115_spl2), sum(!z115_spl2))[1] 1808 1266 542
[1] 0.7002212
z221購買機率模型
z221_X1 = subset(X, cust %in% z221_cu_A1$cust & date < as.Date("2001-02-01"))
z221_Z1 = subset(Z, cust %in% z221_cu_A1$cust & date < as.Date("2001-02-01"))
set.seed(2018)
z221_spl = sample.split(z221_cu_A1$buy, SplitRatio = 0.7)
c(nrow(z221_cu_A1),sum(z221_spl),sum(!z221_spl))[1] 3338 2336 1002
FALSE TRUE
0.4860279 0.4863014
cbind(z221_cu_A1, z221_spl) %>% filter(buy) %>%
ggplot(aes(x=log(amount))) + geom_density(aes(fill=z221_spl), alpha=0.5)z221的購買金額模型
z221_A2 = subset(z221_cu_A1, buy) %>% mutate_at(c("m","rev","amount"), log10)
n = nrow(z221_A2)
#隨機抽樣製作分割向量spl2
set.seed(2018); z221_spl2 = 1:n %in% sample(1:n, round(0.7*n))
c(nrow(z221_A2), sum(z221_spl2), sum(!z221_spl2))[1] 1623 1136 487
[1] 0.6999384
使用spl切割機率模型的訓練(TR)與測試資料(TS)
z115目標客群的測試資料預測
Call:
glm(formula = buy ~ ., family = binomial(), data = z115_TR[,
c(3:9, 12)])
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -7.929e-01 1.620e-01 -4.893 9.91e-07 ***
r -1.301e-02 2.562e-03 -5.080 3.78e-07 ***
s 7.731e-03 2.630e-03 2.939 0.00329 **
f 2.887e-01 4.198e-02 6.878 6.05e-12 ***
m 3.663e-05 8.637e-05 0.424 0.67146
rev 4.363e-05 5.866e-05 0.744 0.45700
raw -3.742e-04 2.513e-04 -1.489 0.13640
agea39 -3.868e-02 9.391e-02 -0.412 0.68039
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 3147.4 on 2288 degrees of freedom
Residual deviance: 2644.2 on 2281 degrees of freedom
AIC: 2660.2
Number of Fisher Scoring iterations: 5
z115_pred = predict(z115_glm1, z115_TS, type = "response")
z115_cm = table(actual = z115_TS$buy, predict = z115_pred > 0.5);z115_cm predict
actual FALSE TRUE
FALSE 314 125
TRUE 181 361
[1] 0.4475025 0.6880734
[,1]
FALSE vs. TRUE 0.7513722
z221目標客群的測試資料預測
Call:
glm(formula = buy ~ ., family = binomial(), data = z221_TR[,
c(3:9, 12)])
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.133e+00 1.597e-01 -7.097 1.27e-12 ***
r -1.326e-02 2.533e-03 -5.234 1.66e-07 ***
s 1.163e-02 2.616e-03 4.445 8.79e-06 ***
f 2.682e-01 4.534e-02 5.916 3.30e-09 ***
m 2.365e-05 7.547e-05 0.313 0.754
rev 5.172e-05 4.865e-05 1.063 0.288
raw -3.040e-04 2.235e-04 -1.360 0.174
agea39 -4.673e-02 9.150e-02 -0.511 0.610
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 3236.6 on 2335 degrees of freedom
Residual deviance: 2780.5 on 2328 degrees of freedom
AIC: 2796.5
Number of Fisher Scoring iterations: 5
z221_pred = predict(z221_glm1, z221_TS, type = "response")
z221_cm = table(actual = z221_TS$buy, predict = z221_pred > 0.5);z221_cm predict
actual FALSE TRUE
FALSE 391 124
TRUE 205 282
[1] 0.5139721 0.6716567
[,1]
FALSE vs. TRUE 0.7369989
z115客群預測未來購買金額
z115_A2 = subset(z115_cu_A1, z115_cu_A1$buy) %>% mutate_at(c("m","rev","amount"), log10)
z115_TR2 = subset(z115_A2, z115_spl2)
z115_TS2 = subset(z115_A2, !z115_spl2)
Call:
lm(formula = amount ~ ., data = z115_TR2[, c(3:9, 11)])
Residuals:
Min 1Q Median 3Q Max
-2.04482 -0.21936 0.04393 0.27321 1.36192
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.406e+00 1.175e-01 11.964 < 2e-16 ***
r 4.926e-04 8.326e-04 0.592 0.55422
s -2.902e-05 8.349e-04 -0.035 0.97228
f 2.328e-02 5.411e-03 4.302 1.82e-05 ***
m 4.745e-01 1.042e-01 4.552 5.83e-06 ***
rev 2.308e-02 9.836e-02 0.235 0.81452
raw 6.501e-05 2.048e-05 3.174 0.00154 **
agea39 1.138e-02 2.316e-02 0.491 0.62328
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4098 on 1258 degrees of freedom
Multiple R-squared: 0.2777, Adjusted R-squared: 0.2737
F-statistic: 69.09 on 7 and 1258 DF, p-value: < 2.2e-16
z115_r2.tr = summary(z115_lm1)$r.sq
z115_SST = sum((z115_TS2$amount - mean(z115_TR2$amount))^ 2)
z115_SSE = sum((predict(z115_lm1, z115_TS2) - z115_TS2$amount)^2)
z115_r2.ts = 1 - (z115_SSE/z115_SST)
c(z115_R2train=z115_r2.tr, z115_R2test=z115_r2.ts)#模型可以解釋testing dataset 將近2成的變異z115_R2train z115_R2test
0.2776940 0.3015492
z221客群預測未來購買金額
z221_A2 = subset(z221_cu_A1, z221_cu_A1$buy) %>% mutate_at(c("m","rev","amount"), log10)
z221_TR2 = subset(z221_A2, z221_spl2)
z221_TS2 = subset(z221_A2, !z221_spl2)
Call:
lm(formula = amount ~ ., data = z221_TR2[, c(3:9, 11)])
Residuals:
Min 1Q Median 3Q Max
-1.8689 -0.2432 0.0557 0.3053 1.3045
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.439e+00 1.325e-01 10.861 < 2e-16 ***
r 7.535e-04 8.610e-04 0.875 0.381705
s 9.269e-05 8.755e-04 0.106 0.915699
f 2.014e-02 6.952e-03 2.897 0.003837 **
m 4.231e-01 1.126e-01 3.759 0.000179 ***
rev 5.801e-02 1.051e-01 0.552 0.581162
raw 6.230e-05 2.670e-05 2.333 0.019816 *
agea39 1.886e-02 2.527e-02 0.746 0.455802
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4237 on 1128 degrees of freedom
Multiple R-squared: 0.2264, Adjusted R-squared: 0.2216
F-statistic: 47.17 on 7 and 1128 DF, p-value: < 2.2e-16
z221_r2.tr = summary(z221_lm1)$r.sq
z221_SST = sum((z221_TS2$amount - mean(z221_TR2$amount))^ 2)
z221_SSE = sum((predict(z221_lm1, z221_TS2) - z221_TS2$amount)^2)
z221_r2.ts = 1 - (z221_SSE/z221_SST)
c(z221_R2train=z221_r2.tr, z221_R2test=z221_r2.ts)#模型可以解釋testing dataset 將近2成的變異z221_R2train z221_R2test
0.2264430 0.2253829
load("data/tf0.rdata")
d0 = max(X0$date) + 1
B = X0 %>%
filter(date >= as.Date("2000-12-01")) %>%
mutate(days = as.integer(difftime(d0, date, units="days"))) %>%
group_by(cust) %>% summarise(
r = min(days), # recency
s = max(days), # seniority
f = n(), # frequency
m = mean(total), # monetary
rev = sum(total), # total revenue contribution
raw = sum(gross), # total gross profit contribution
age = age[1], # age group
area = area[1], # area code
TID = min(tid)
) %>% data.frame # 28531
nrow(B)[1] 28531
z115購買機率
z115_B2 = z115_B %>% mutate_at(c("m","rev"), log10)
z115_B$Rev = 10^predict(z115_lm1, z115_B2)
#"m","rev"兩個變數和目標變數取log
#"rev"為預期購買金額
par(mfrow=c(1,2), cex=0.8)
hist(z115_B$Buy)
hist(log(z115_B$Rev,10))z221購買機率
z221_B2 = z221_B %>% mutate_at(c("m","rev"), log10)
z221_B$Rev = 10^predict(z221_lm1, z221_B2)
#"m","rev"兩個變數和目標變數取log
#"rev"為預期購買金額
par(mfrow=c(1,2), cex=0.8)
hist(z221_B$Buy)
hist(log(z221_B$Rev,10))接著透過計算顧客終生價值瞭解每一個顧客的潛在價值有多大。
g = 0.3 # margin
N = 36 # period(一期一個月)
d = 0.01 # interest rate
z115_B$CLV = g * z115_B$Rev * rowSums(sapply(
0:N, function(i) (z115_B$Buy/(1+d))^i ) )
summary(z115_B$CLV)#該顧客的終生價值,未來3年的營收貢獻 Min. 1st Qu. Median Mean 3rd Qu. Max.
35.9 291.3 455.7 617.7 696.3 14799.0
S曲線
- S-Curve : 許多管理工具都呈現S型的成本效益函數
- 以R內建的邏輯式函數(plogis())來模擬S曲線 \[\Delta P(x|m,b,a) = m \cdot Logis(\frac{10(x -
b)}{a})\]
DP = function(x,m0,b0,a0) {m0*plogis((10/a0)*(x-b0))}
par(mar=c(4,4,2,1),cex=0.7)
curve(DP(x,m=0.20,b=30,a=40), 0, 60, lwd=2, ylim=c(0, 0.25),
main="F( x | m=0.2, b=30, a=40 )", ylab="delta P")
abline(h=seq(0,0.2,0.05),v=seq(0,60,5),col='lightgrey',lty=2)
透過這3個parameters(參數):
有了行銷工具的成本效益函數之後,我們就可以估計將這個工具用在每一位顧客上的時候的預期效益:
\[\hat{R}(x) = \left\{\begin{matrix} \Delta P \cdot M \cdot margin - x & , & P + \Delta P \leq 1\\ (1-P) \cdot M \cdot margin - x & , & else \end{matrix}\right.\]
結合以下, - 預測 (\(P, M\)) : 每位顧客的預期購買機率和購買金額, - 假設 (\(\Delta P(x|m,b,a)\)) : 行銷工具帶來的再購機率增額 我們就可以估計這個工具用在每位顧客上的預期效益 \(\hat{R}(x)\)。
🌻 注意\(\Delta P\) 和 \(\hat{R}\) 都是藉由函數 \(x\) 的 \(m,b,a\) 所得到
z221客群的淨收益
#z115族客群的示意圖(改mba)
m=0.25; b=20; a=30; x=27
z115_dp = pmin(1-z115_B$Buy, DP(x,m,b,a))
z115_eR = z115_dp*z115_B$Rev*margin - x
hist(z115_eR,main="TA1預期淨收益分佈(m=0.20; b=20; a=30;x=27)",xlab="z115_a3439預期淨收益",ylab="z115_a3439顧客人數")z221客群的淨收益
#z221族群的示意圖(改mba)
m=0.2; b=20; a=30; x=27
z221_dp = pmin(1-z221_B$Buy, DP(x,m,b,a))
z221_eR = z221_dp*z221_B$Rev*margin - x
hist(z221_eR,main="TA2預期淨收益分佈(m=0.20; b=20; a=30;x=27)",xlab="z221_a3439預期淨收益",ylab="z221_a3439顧客人數")一個行銷工具給定工具參數(m,b,a),我們可在其有效成本範圍(x∈[b−a2,b+a2] )之內,估計工具的效果: - eReturn : 對所有的人行銷的總預期收益 - N : 預期收益大於零的人數 - eReturn2 : 只對期收益大於零的人做行銷的總預期收益如何隨成本變化。
mm=c(0.15, 0.25)
bb=c( 10, 25)
aa=c( 20, 30)
X = seq(0,60,2)
do.call(rbind, lapply(1:length(mm), function(i) data.frame(
Inst = ifelse(i == 1, "折價券", "累點"), Cost=X,
Gain=DP(X,mm[i],bb[i],aa[i])
))) %>% data.frame %>%
ggplot(aes(x=Cost, y=Gain, col=Inst)) +
geom_line(size=1.5,alpha=0.5) + theme_bw() +
ggtitle("Prob. Function: f(x|m,b,a)")Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
mm=c(0.2, 0.25)
bb=c( 20, 20)
aa=c( 30, 20)
X = seq(0,60,2)
do.call(rbind, lapply(1:length(mm), function(i) data.frame(
Inst = ifelse(i == 1, "折價券", "累點"), Cost=X,
Gain=DP(X,mm[i],bb[i],aa[i])
))) %>% data.frame %>%
ggplot(aes(x=Cost, y=Gain, col=Inst)) +
geom_line(size=1.5,alpha=0.5) + theme_bw() +
ggtitle("Prob. Function: f(x|m,b,a)")X = seq(10, 60, 1)
df = do.call(rbind, lapply(1:length(mm), function(i) {
sapply(X, function(x) {
z115_dp = pmin(1-z115_B$Buy, DP(x,mm[i],bb[i],aa[i]))
z115_eR = z115_dp*z115_B$Rev*margin - x
c(i=i, x=x, z115_eR.ALL=sum(z115_eR), N=sum(z115_eR>0), z115_eR.SEL=sum(z115_eR[z115_eR > 0]) )
}) %>% t %>% data.frame
}))
df %>%
mutate_at(vars(z115_eR.ALL, z115_eR.SEL), function(y) round(y/1000)) %>%
gather('key','value',-i,-x) %>%
mutate(Inst = ifelse(i == 1, "折價券", "累點")) %>%
ggplot(aes(x=x, y=value, col=Inst)) +
geom_hline(yintercept=0, linetype='dashed', col='blue') +
geom_line(size=1.5,alpha=0.5) +
xlab('工具選項(成本)') + ylab('預期收益($K)') +
ggtitle('行銷工具優化','假設行銷工具的效果是其成本的函數') +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw() -> p
plotly::ggplotly(p)# A tibble: 2 × 5
# Groups: i [2]
i x z115_eR.ALL N z115_eR.SEL
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 27 418. 623 7757.
2 2 26 13359. 962 17144.
mm=c(0.25, 0.2)
bb=c( 15, 25)
aa=c( 20, 30)
X = seq(0,60,2)
do.call(rbind, lapply(1:length(mm), function(i) data.frame(
Inst = ifelse(i == 1, "折價券", "累點"), Cost=X,
Gain=DP(X,mm[i],bb[i],aa[i])
))) %>% data.frame %>%
ggplot(aes(x=Cost, y=Gain, col=Inst)) +
geom_line(size=1.5,alpha=0.5) + theme_bw() +
ggtitle("Prob. Function: f(x|m,b,a)")X = seq(10, 60, 1)
df = do.call(rbind, lapply(1:length(mm), function(i) {
sapply(X, function(x) {
z221_dp = pmin(1-z221_B$Buy, DP(x,mm[i],bb[i],aa[i]))
z221_eR = z221_dp*z221_B$Rev*margin - x
c(i=i, x=x, z221_eR.ALL=sum(z221_eR), N=sum(z221_eR>0), z221_eR.SEL=sum(z221_eR[z221_eR > 0]) )
}) %>% t %>% data.frame
}))
df %>%
mutate_at(vars(z221_eR.ALL, z221_eR.SEL), function(y) round(y/1000)) %>%
gather('key','value',-i,-x) %>%
mutate(Inst = ifelse(i == 1, "折價券", "累點")) %>%
ggplot(aes(x=x, y=value, col=Inst)) +
geom_hline(yintercept=0, linetype='dashed', col='blue') +
geom_line(size=1.5,alpha=0.5) +
xlab('工具選項(成本)') + ylab('預期收益($K)') +
ggtitle('行銷工具優化','假設行銷工具的效果是其成本的函數') +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw() -> p
plotly::ggplotly(p)根據預期的總效益顯示,有了以下的行銷策略:
1.
集點加購送鍋具適用於z115(南港)的客群,總觸及人數有962人,預期效益為17144。
2.
滿千送95折折價券(規定於下次使用)則適用於z221的客群(汐止區),總觸及會有1352人,預期效益30366。
故總觸及人數會有2314人,總結效益為47510。